#### Script to do multiple treeMI imputations for manufacturing industry NAICS 331111-332999
### and export the imputed datasets

#### input files:

#### output files:

require(tree)

### Read in the entire 2007 CMF dataset (where items imputed by industry average ration or univarirate regression have been made missing)

gooddata<-read.csv("gooddata_all_inds02.csv",header=TRUE)

## Just rearranging the columns so that I know what order they are in:
mygooddata<-gooddata[c("NAICS_NEW_6","TAE","CM","TE","PH","SW","TVS","dinv","energycmratio","wwswratio")]

industries<-read.csv("cmf02_inds.csv",header=TRUE)

industry<-industries[industries$number==237,c("NAICS_NEW_6")]
treeData<-mygooddata[mygooddata$NAICS_NEW_6==industry,]

## We only want to run treeMI if the original data has some missing values, so first check for missing values
missingcounter<-0
for (Var in names(treeData))  {
    missing <- sum(is.na(treeData[,Var]))
    missingcounter <- missingcounter + missing
    print(c(industry,Var,missing))
}
if (missingcounter > 0) {

  imputes<-treeMI(treeData,ITER=5,c(1,0,0,0,0,0,0,0,0,0),starter=TRUE,PPDdraw = FALSE, minCut = 5,minDev  = 0.00001, startCut = 5, startDev = 0.00001)
  imputes$impsetnum <- 1
  write.table(imputes,file="imputes_all_inds02_331_332.csv",append=FALSE,sep=",")
  for (j in 2:100) {

      imputes<-treeMI(treeData,ITER=5,c(1,0,0,0,0,0,0,0,0,0),starter=TRUE,PPDdraw = FALSE, minCut = 5,minDev  = 0.00001, startCut = 5, startDev = 0.00001)
      imputes$impsetnum <- j 
      write.table(imputes,file="imputes_all_inds02_331_332.csv",append=TRUE,sep=",",col.names=FALSE) 
  }
}

### Loop through industries:  numbers 238-305 correspond to NAICS 331112 to 332999  

for (i in 238:305)  {

    industry<-industries[industries$number==i,c("NAICS_NEW_6")]
    treeData<-mygooddata[mygooddata$NAICS_NEW_6==industry,]

    ## We only want to run treeMI if the original data has some missing values, so first check for missing values
    missingcounter<-0
    for (Var in names(treeData))  {
        missing <- sum(is.na(treeData[,Var]))
        missingcounter <- missingcounter + missing
        print(c(industry,Var,missing))

    }
    if (missingcounter > 0) {
        imputes<-treeMI(treeData,ITER=5,c(1,0,0,0,0,0,0,0,0,0),starter=TRUE,PPDdraw = FALSE, minCut = 5,minDev  = 0.000001, startCut = 5, startDev = 0.000001)
        imputes$impsetnum <- 1
        write.table(imputes,file="imputes_all_inds02_331_332.csv",append=TRUE,sep=",",col.names=FALSE) 
        for (j in 2:100) {
            imputes<-treeMI(treeData,ITER=5,c(1,0,0,0,0,0,0,0,0,0),starter=TRUE,PPDdraw = FALSE, minCut = 5,minDev  = 0.00001, startCut = 5, startDev = 0.00001)
            imputes$impsetnum <- j 
            write.table(imputes,file="imputes_all_inds02_331_332.csv",append=TRUE,sep=",",col.names=FALSE) 
        }
    }

}
